This is hopefully the results section for the Study 2 NSE & SE babies watching ASL Stories. We have two main factors:
- Language (Sign v. English)
- Direction (Forward v. Reversed)
Ages of babies are from 5 to 14.99 months. That means we lose two “old” CODA babies - Janine (16 mo) and za02 (23 mo). The dataset also contains factors for younger v. older babies IF we want to do age group splits. I do not recommend it because we lose all significance that way and we have more precise age information, better than brute groups. IF we do use it, the baby age groups are such:
- Younger babies = 5 to 8.99 months
- Older babies = 9.0 to 14.99 months
To skip to the XY Data, go here. To skip to the AOI/FCR analysis, go here.
Demographics
library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)
# Grab data that was produced in 03importcleanbabies.Rmd
babies <- read_feather("cleanedbabyeyedata.feather") %>%
mutate(age = age*12) %>%
select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
rename(name = participant) %>%
mutate(agegroup = case_when(
age <= 8.99 ~ "younger",
age >= 9.0 & age < 15 ~ "older"
)) %>%
filter(!is.na(agegroup)) %>%
mutate(language = case_when(
language == "english" ~ "NSE",
language =="sign" ~ "SE"
)) %>%
rename(lang = language) %>%
mutate(lang2 = case_when(
name == "aubrey CODA 11m" ~ "HSE",
name == "GemmaF_11_4_13_CODA" ~ "HSE",
name == "Hannah_CODA_7m" ~ "HSE",
name == "ke11es12_7m_MomTerp" ~ "LSE",
name == "Miles_14m_SE" ~ "HSE",
name == "Parker 6 m SIGNING" ~ "HSE",
name == "Penn 6 months SIGN EXPOSED" ~ "HSE",
name == "Trinity 8m" ~ "LSE",
name == "Victoria07_18_17" ~ "LSE",
name == "Wells_8mos" ~ "LSE",
TRUE ~ lang
))
# filter(name != "Victoria07_18_17")
babiesinfo <- babies %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang) %>%
summarise(N = n(),
age_mean = mean(age),
sd = sd(age),
min = min(age),
max = max(age))
genders <- babies %>%
select(name, lang, age, gender) %>%
distinct() %>%
group_by(lang, gender) %>%
summarise(N = n()) %>%
spread(gender, N)
babiesinfo <- left_join(babiesinfo, genders) %>%
select(lang, N, Female, Male, age_mean, sd, min, max) %>%
print()
babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))
# IF we do age groups, use this code
#
# babiesinfo <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup) %>%
# summarise(N = n(),
# age_mean = mean(age),
# sd = sd(age),
# min = min(age),
# max = max(age))
#
# genders <- babies %>%
# select(name, lang, age, agegroup, gender) %>%
# distinct() %>%
# group_by(lang, agegroup, gender) %>%
# summarise(N = n()) %>%
# spread(gender, N)
#
# babiesinfo <- left_join(babiesinfo, genders) %>%
# select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
# print()
Global Looking
We calculated percentages based on overall clip length as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length.
The chart below shows little differences based on factors Age, Language, or Direction. That’s good, means the videos were equally engaging for all babies, right?
babies$lang <- as.factor(babies$lang)
babies_overall_looking <- babies %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(percent = sum(percent)) # gets total looking percent for each trial for each baby
# Table of means
babies_overall_looking %>%
group_by(name, lang, direction) %>%
summarise(percent = mean(percent)) %>% # get average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_percent = mean(percent),
count = n(),
sd = sd(percent),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
ggplot(babies_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Video Attention") +
xlab("age (months)") +
ylab("percent looking") +
theme_bw() +
scale_y_continuous(limits = c(0,1), labels = percent)

# Plot
# babies_overall_looking %>%
# group_by(lang, direction, name) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(lang, direction) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# # facet_wrap("lang") +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")
# babies_overall_looking %>%
# ggplot(aes(x = lang, y = percent, fill = direction)) +
# facet_wrap("agegroup") +
# geom_violin()
A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much the babies looked at the stimuli.
global_lm <- lmer(percent ~ age + lang * direction + (direction|name) + (direction|story), data = babies_overall_looking)
summary(global_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (direction | name) + (direction | story)
Data: babies_overall_looking
REML criterion at convergence: -109.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.17439 -0.71127 -0.01207 0.70541 2.47192
Random effects:
Groups Name Variance Std.Dev. Corr
name (Intercept) 0.0103084 0.10153
directionreversed 0.0009282 0.03047 1.00
story (Intercept) 0.0086180 0.09283
directionreversed 0.0039286 0.06268 -0.76
Residual 0.0330635 0.18183
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.679103 0.086923 28.572445 7.813 1.43e-08 ***
age 0.002021 0.008959 23.305300 0.226 0.824
langSE -0.049624 0.050507 23.785326 -0.983 0.336
directionreversed -0.033434 0.034821 12.347582 -0.960 0.355
langSE:directionreversed 0.019263 0.041732 94.210799 0.462 0.645
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.854
langSE -0.051 -0.197
dirctnrvrsd -0.228 0.006 0.068
lngSE:drctn 0.034 -0.001 -0.126 -0.497
AOI Looking
Now we’ll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let’s make a boxplot of all AOIs.
#Recalculate percent
babies <- babies %>%
select(-percent) %>%
group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender) %>%
mutate(totalsec = sum(secs)) %>%
group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender, aoi) %>%
mutate(percent = secs/totalsec)
# Boxplot
babies %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))

It appears two important AOIs are MidChestTop and MidFaceBottom. Let’s look again only at midline AOIs:
midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")
babies %>%
filter(aoi %in% midline) %>%
ggplot(aes(x = aoi, y = percent, fill = direction)) +
geom_boxplot() +
ggtitle("Midline AOI Attention") +
theme_bw() +
xlab("") +
theme(axis.text.x = element_text(angle=45, hjust = 1),
panel.grid.major.x = element_blank()) +
scale_y_continuous(labels = scales::percent, limits = c(0,1))

I’m going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.
MidChestTop:
- Significant effect of age (p = 0.004) - older babies look at MidChestTop AOI more.
- Significant effect of language (p = 0.006) - NSE babies look +14.6% more than SE babies.
- Significant effect of direction (p = 0.046) - Babies look about 4.5% less for reversed
- No language X direction interaction.
MidFaceBottom:
- No effect of age
- Significant effect of language (p = 0.004) - SE babies look +21% more than NSE babies.
- Marginal effect of language (p = 0.073) - Babies look about 2% more for reversed.
- No language X direction interaction.
babies %>%
filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
ggplot(aes(x = age, y = percent, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(0,1), labels = percent) +
theme_bw() +
# theme(panel.grid.major.x = element_blank()) +
facet_grid(aoi ~ lang) +
ggtitle("AOI Attention") +
xlab("") +
ylab("percent looking")

midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidChestTop"))
summary(midchesttop_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(babies, aoi == "MidChestTop")
REML criterion at convergence: -215.2
Scaled residuals:
Min 1Q Median 3Q Max
-3.0246 -0.7130 -0.1414 0.6022 2.6752
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.0114441 0.1070
story (Intercept) 0.0002891 0.0170
Residual 0.0253365 0.1592
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.110030 0.076921 23.643715 1.430 0.16568
age 0.029452 0.008477 22.192885 3.474 0.00213 **
langSE -0.134837 0.050436 27.916006 -2.673 0.01240 *
directionreversed -0.043631 0.022400 320.218547 -1.948 0.05232 .
langSE:directionreversed 0.038910 0.034773 317.931790 1.119 0.26400
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.912
langSE -0.076 -0.188
dirctnrvrsd -0.137 -0.002 0.213
lngSE:drctn 0.084 0.006 -0.336 -0.645
#ggcoef(midchesttop_lm)
midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
Data: filter(babies, aoi == "MidFaceBottom")
REML criterion at convergence: -157.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.42325 -0.77817 -0.02519 0.70580 2.50539
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.019877 0.14099
story (Intercept) 0.002498 0.04998
Residual 0.028516 0.16887
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.334139 0.099651 25.233307 3.353 0.00253 **
age -0.008939 0.010886 22.948912 -0.821 0.42004
langSE 0.193693 0.063648 26.900910 3.043 0.00518 **
directionreversed 0.038727 0.023881 317.208664 1.622 0.10587
langSE:directionreversed -0.052616 0.036996 315.990174 -1.422 0.15595
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.903
langSE -0.065 -0.193
dirctnrvrsd -0.112 -0.002 0.179
lngSE:drctn 0.069 0.006 -0.283 -0.647
#ggcoef(midfacebottom_lm)
# Bar chart
# babies %>%
# filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
# group_by(agegroup, lang, direction, name, aoi) %>%
# summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction, aoi) %>%
# summarise(mean_percent = mean(percent), # gets group averages
# count = n(),
# sd = sd(percent),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_percent, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(0,1), labels = percent) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_grid(aoi ~ agegroup) +
# ggtitle("Video Attention") +
# xlab("") +
# ylab("percent looking")
Face-Chest Ratio
Next, we’ll define a Face-Chest Ratio (FCR) such that:
- MidFaceCenter, MidFaceBottom = Face
- MidChestTop, MidChestCenter, MidChestBottom, BelowChest = Chest
- FCR = face - chest / face + chest
We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.
babies_fcr <- babies %>%
spread(aoi,percent) %>%
group_by(name, age, agegroup, lang, gender, direction, story, repetition) %>%
summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
fcr = (face - chest) / (face + chest))
# Table of means
babies_fcr %>%
group_by(lang, direction, name) %>%
summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_fcr = mean(fcr), # gets group averages
count = n(),
sd = sd(fcr),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(babies_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Face-Chest Ratios") +
xlab("") +
ylab("FCR")

# Bar chart
# babies_fcr %>%
# group_by(agegroup, lang, direction, name) %>%
# summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
# group_by(agegroup, lang, direction) %>%
# summarise(mean_fcr = mean(fcr), # gets group averages
# count = n(),
# sd = sd(fcr),
# se = sd/sqrt(count)) %>%
# ggplot(aes(x = lang, y = mean_fcr, fill = direction)) +
# geom_col(position = "dodge") +
# geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se),
# position = position_dodge(width = 0.9), width = 0.25) +
# scale_y_continuous(limits = c(-1,1)) +
# theme_minimal() +
# theme(panel.grid.major.x = element_blank()) +
# facet_wrap("agegroup") +
# ggtitle("Face-Chest Ratios") +
# xlab("") +
# ylab("FCR")
What will a linear mixed model tell us? (with no age interactions)
- No effect of age. Interesting. Maybe because we don’t have that many babies.
- Strong effect of language: SE babies have overall +0.50 FCR than NSE babies (p = 0.008); in other words, they are more attracted to the face.
- Effect of direction: Reversal increases FCR by +0.12 across all babies (p = 0.032). Interesting.
- Weak language x direction interaction. It seems NSE babies have a bigger reversal effect than SE babies? Trying to judge from the heat maps. (p = 0.095). BUt it’s so weak so possibly not worth mentioning.
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = babies_fcr)
summary(fcr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: fcr ~ age + lang * direction + (1 | name) + (1 | story)
Data: babies_fcr
REML criterion at convergence: 435.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.2135 -0.7622 0.1098 0.6343 2.3489
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.14776 0.3844
story (Intercept) 0.01342 0.1158
Residual 0.15708 0.3963
Number of obs: 349, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.07490 0.26654 24.59417 0.281 0.7811
age -0.04443 0.02930 22.87598 -1.516 0.1431
langSE 0.45427 0.16972 25.84156 2.677 0.0127 *
directionreversed 0.11887 0.05605 316.96084 2.121 0.0347 *
langSE:directionreversed -0.14367 0.08683 315.78391 -1.655 0.0990 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.908
langSE -0.060 -0.196
dirctnrvrsd -0.098 -0.002 0.158
lngSE:drctn 0.060 0.005 -0.249 -0.647
Heat Maps
And now heat maps!
heatmap_babies <- babies %>%
filter(aoi %in% midline) %>%
ungroup() %>%
group_by(lang, name, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
group_by(lang, direction, aoi) %>%
summarise(percent = mean(percent, na.rm=TRUE)) %>%
ungroup() %>%
mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
"MidFaceBottom","MidFaceCenter","MidFaceTop")))
ggplot(heatmap_babies, aes(x = lang, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ direction) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))

ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) +
# scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
theme_bw() +
theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"),
strip.background = element_rect(colour = "white", fill = "white"),
panel.grid.major = element_line(color = "white")) +
facet_grid(. ~ lang) +
ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(expand = c(0,0))

Different Ways to Visualize Reversal
I want to try to visualize reversal effects a different way. Maybe this.
# Get participant-level data
babies_fcr2 <- babies_fcr %>%
group_by(name, age, lang, direction) %>%
summarise(fcr = mean(fcr))
# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = babies_fcr2)
# summary(reversal_effect_lm)
ggplot(babies_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
geom_point() +
geom_line(aes(group = name)) +
facet_grid(. ~ lang) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw()

Or a reversal effect chart? Okay, so this chart tells us overall there really wasn’t much of a reversal effect for SE babies, they’re all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories!
# Get participant-level data
babies_fcr3 <- babies_fcr2 %>%
spread(direction, fcr) %>%
group_by(name, age, lang) %>%
summarise(diff = forward - reversed)
ggplot(babies_fcr3, aes(x = age, y = diff, color = lang)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
scale_y_continuous(limits = c(-1,1)) +
theme_bw() +
ggtitle("Reversal Effect") +
ylab("Forward FCR - Reversed FCR")

This analysis of within-subject variation here:
# First get the mean of each trial, THEN the participant-level means
within_subjects <- babies_fcr %>%
group_by(name, lang, direction, story, repetition) %>%
summarise(fcr = mean(fcr, na.rm = TRUE),
count = n()) %>%
group_by(name, lang, direction) %>%
summarise(mean = mean(fcr, na.rm = TRUE),
se = sd(fcr, na.rm = TRUE)/sqrt(n()),
count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
select(-se, -count) %>%
spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
select(-mean, -count) %>%
spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))
# Now let's plot
lims <- c(-1,1)
within_subjects %>%
ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
geom_abline() +
geom_point(size = 2) +
geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
theme_bw() +
theme(aspect.ratio = 1) +
scale_x_continuous("forward", limits = c(-1,1)) +
scale_y_continuous("reversed", limits = c(-1,1)) +
ggtitle("FCR Means") +
facet_wrap("lang")

And a classic box/error plot with age collapsed.
babies_fcr2 %>%
group_by(lang, direction) %>%
summarise(fcr_mean = mean(fcr),
sd = sd(fcr),
n = n(),
se = sd/sqrt(n)) %>%
ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
scale_y_continuous(limits = c(-0.5, 0.5)) +
theme_linedraw()

Discussion re AOI/anatomical data
The biggest change is we lost the interaction between language and direction. For the ICSLA abstract we reported a strong interaction (p = 0.01), but now it’s p = 0.10. Shoot.
The interpretation here is that:
- All babies looked equally at all videos regardless of language or direction. Good!
- SE babies are stronger face-lookers than NSE babies. (Same as ICSLA)
- Reversal appears to affect NSE babies more than SE babies? (Marginal). This is unlike ICSLA.
XY Space Data
We’ll load the data from the childxydata.feather file made in 06rawxydata.Rmd. So any new babies, please run the first code block in 06 to include it. Then we’ll keep all the babies we also have in the AOI data group.
included <- babies %>%
ungroup() %>%
select(name) %>%
distinct() %>%
unlist()
xydata <- read_feather("../Child Data/childxydata.feather") %>%
rename(name = participant) %>%
filter(name %in% included)
# Get ages
ages <- read_csv("childrenages.csv") %>%
rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
mutate(age = age*12) %>%
mutate(agegroup = case_when(
age <= 8.99 ~ "younger",
age >= 9.0 & age < 15 ~ "older"
)) %>%
mutate(language = case_when(
language == "EnglishExposed" ~ "NSE",
language == "SignLanguageExposed" ~ "SE"
)) %>%
rename(lang = language) %>%
select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
separate(condition, into = c("story", "clip", "direction")) %>%
unite("story", c("story", "clip")) %>%
mutate(direction = case_when(
direction == "ER" ~ "reversed",
direction == "FW" ~ "forward"
)) %>%
mutate(name = factor(name),
group = factor(group),
gender = factor(gender),
lang = factor(lang),
story = factor(story),
direction = factor(direction),
mark = factor(mark),
trial = factor(trial),
repetition = factor(repetition),
agegroup = factor(agegroup))
Overall Looking
Let’s check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as “Global Looking” we have above, really, but w raw xy data.
xy_overall <- xydata %>%
filter(!is.na(x)) %>%
group_by(name, age, lang, direction, story, repetition) %>%
summarise(data_points = n()) # gets total looking percent for each trial for each baby
# Table of means
xy_overall %>%
group_by(name, lang, direction) %>%
summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_data_points = mean(data_points),
count = n(),
sd = sd(data_points),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
facet_grid(. ~ lang) +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Data Points") +
xlab("age (months)") +
ylab("data points recorded") +
theme_bw()

A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much data was collected from the babies.
overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: data_points ~ age + lang * direction + (direction | name) + (direction | story)
Data: xy_overall
REML criterion at convergence: 5528.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.7588 -0.6663 -0.0078 0.6278 2.5542
Random effects:
Groups Name Variance Std.Dev. Corr
name (Intercept) 20748.3 144.04
directionreversed 827.6 28.77 1.00
story (Intercept) 22549.1 150.16
directionreversed 4215.1 64.92 -0.05
Residual 52385.3 228.88
Number of obs: 401, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 524.548 120.054 29.519 4.369 0.000141 ***
age 8.531 12.060 23.322 0.707 0.486339
langSE 8.304 68.321 23.886 0.122 0.904275
directionreversed -20.437 38.068 11.693 -0.537 0.601428
langSE:directionreversed -16.638 48.592 131.756 -0.342 0.732595
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.828
langSE -0.040 -0.204
dirctnrvrsd -0.045 -0.003 0.063
lngSE:drctn 0.026 0.002 -0.130 -0.499
XY Data LMMs
Now we’re going to run LMMs on babies’ raw:
- horizontal spread (middle 50% of x data; xIQR)
- vertical spread (middle 50% of y data; yIQR)
- viewing area (A = middle-x * middle-y; area)
But to do this we first trim each baby’s data, getting rid of the first 60 samples (0.5 secs) of each trial.
xydata <- xydata %>%
group_by(name,trial) %>%
slice(60:n())
iqr <- xydata %>%
group_by(name, age, lang, story, direction, trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE),
area = xIQR*yIQR)
head(iqr,20)
Middle X
Results tell us that there is a significant effect of age where horizontal spread grew narrower with older babies (p = 0.024; slope = -2.2 px/month). There was a marginal effect of language where SE babies have slightly narrower horizontal spread (11 px; p = 0.088). No effects of reversal or interactions.
xiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_xIQR = mean(xIQR), # gets group averages
count = n(),
sd = sd(xIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Horizontal Spread") +
xlab("") +
ylab("xIQR")

ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()

xiqr_lm <- lmer(xIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(xiqr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: xIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 3839.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.1407 -0.4782 -0.1683 0.2668 9.8629
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 104.0925 10.2026
story (Intercept) 0.4767 0.6905
Residual 901.8472 30.0308
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 63.5887 8.5596 25.6516 7.429 7.5e-08 ***
age -2.2423 0.9342 23.2148 -2.400 0.0248 *
langSE -12.0971 6.0828 41.5383 -1.989 0.0533 .
directionreversed 1.9022 3.8627 372.0710 0.492 0.6227
langSE:directionreversed 0.1790 6.1817 369.3456 0.029 0.9769
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.901
langSE -0.106 -0.175
dirctnrvrsd -0.213 -0.006 0.308
lngSE:drctn 0.135 0.002 -0.504 -0.625
Middle Y
We had a significant effect of age where vertical spread got narrower with older babies (p = 0.019, slope = -5.5 px/month). There were no effect of language, direction, or interactions.
yiqr_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(mean_yIQR = mean(yIQR), # gets group averages
count = n(),
sd = sd(yIQR),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Vertical Spread") +
xlab("") +
ylab("yIQR")

ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()

yiqr_lm <- lmer(yIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(yiqr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: yIQR ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 4251.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.7516 -0.5310 -0.2210 0.3696 5.8948
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 6.890e+02 2.625e+01
story (Intercept) 7.045e-12 2.654e-06
Residual 2.480e+03 4.979e+01
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 119.653 19.332 24.309 6.189 2.03e-06 ***
age -5.506 2.136 23.024 -2.578 0.0168 *
langSE -16.804 13.040 32.003 -1.289 0.2068
directionreversed 8.778 6.407 371.087 1.370 0.1715
langSE:directionreversed -5.713 10.252 370.630 -0.557 0.5777
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.912
langSE -0.079 -0.187
dirctnrvrsd -0.156 -0.005 0.238
lngSE:drctn 0.099 0.001 -0.390 -0.625
Viewing Area
Again, age is significant (p = 0.01) such that area gets smaller with age (about 500 sq. px. per month). No effect of language or direction or interactions.
area_mean <- iqr %>%
group_by(lang, direction, name) %>%
summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
group_by(lang, direction) %>%
summarise(area_mean = mean(area), # gets group averages
count = n(),
sd = sd(area),
se = sd/sqrt(count)) %>%
select(-sd) %>%
print()
# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) +
geom_jitter(alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
theme_bw() +
theme(panel.grid.major.x = element_blank()) +
facet_grid(. ~ lang) +
ggtitle("Viewing Area") +
xlab("") +
ylab("Area (px^2)")

ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
theme_linedraw()

area_lm <- lmer(area ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(area_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: area ~ age + lang * direction + (1 | name) + (1 | story)
Data: iqr
REML criterion at convergence: 8154.5
Scaled residuals:
Min 1Q Median 3Q Max
-1.6788 -0.2727 -0.1278 0.0299 13.4544
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 6.231e+06 2.496e+03
story (Intercept) 4.677e-10 2.163e-05
Residual 5.292e+07 7.275e+03
Number of obs: 398, groups: name, 26; story, 8
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 9372.80 2085.26 25.71 4.495 0.00013 ***
age -589.26 227.76 23.24 -2.587 0.01640 *
langSE -2048.69 1480.52 41.32 -1.384 0.17386
directionreversed 712.93 935.49 372.10 0.762 0.44648
langSE:directionreversed -303.67 1497.33 371.32 -0.203 0.83940
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) age langSE drctnr
age -0.902
langSE -0.105 -0.176
dirctnrvrsd -0.211 -0.006 0.307
lngSE:drctn 0.134 0.002 -0.502 -0.625
Plotting Viewing Area
medians <- iqr %>%
group_by(name,lang,direction) %>%
summarise(xIQR = median(xIQR,na.rm=TRUE),
yIQR = median(yIQR,na.rm=TRUE),
xmed = median(xmed,na.rm=TRUE),
ymed = median(ymed,na.rm=TRUE)) %>%
group_by(lang,direction) %>%
summarise(xIQR = mean(xIQR,na.rm=TRUE),
yIQR = mean(yIQR,na.rm=TRUE),
x = median(xmed,na.rm=TRUE),
y = median(ymed,na.rm=TRUE)) %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
ggplot(medians, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
facet_wrap("lang")

Puppies
Get puppy data!
# puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>%
# clean_names() %>%
# select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
# rename(name = x1) %>%
# mutate(mean = rowMeans(.[2:10], na.rm = T)) %>%
# select(name, mean)
#
# puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>%
# clean_names() %>%
# select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
# rename(name = x1) %>%
# mutate(mean = rowMeans(.[2:17], na.rm = T)) %>%
# select(name, mean)
#
# puppies <- rbind(puppies1, puppies2)
#
# participants <- xydata %>%
# select(name, group, gender, lang) %>%
# distinct()
#
# puppies <- left_join(participants, puppies, by = "name")
#
# summary(lm(data = puppies, mean ~ lang))
#
# puppies_se <- filter(puppies, lang == "NSE")
# puppies_nse <- filter(puppies, lang == "SE")
#
# t.test(puppies_se$mean, puppies_nse$mean)
# Let's do this again but preserve puppy-level data
puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>%
clean_names() %>%
select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
rename(name = x1) %>%
gather(key = puppy, value = sec, -name) %>%
mutate(pups = case_when(
str_detect(puppy, "huskies") ~ "huskies",
str_detect(puppy, "golden") ~ "golden",
str_detect(puppy, "wawa") ~ "wawa",
str_detect(puppy, "frisby") ~ "frisby",
str_detect(puppy, "bulldog") ~ "bulldog",
str_detect(puppy, "puppy_jpg") ~ "puppy",
TRUE ~ puppy
))
puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>%
clean_names() %>%
select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
rename(name = x1) %>%
gather(key = puppy, value = sec, -name) %>%
mutate(pups = case_when(
str_detect(puppy, "huskies") ~ "huskies",
str_detect(puppy, "golden") ~ "golden",
str_detect(puppy, "wawa") ~ "wawa",
str_detect(puppy, "frisby") ~ "frisby",
str_detect(puppy, "bulldog") ~ "bulldog",
str_detect(puppy, "puppies") ~ "puppies",
TRUE ~ puppy
))
puppies <- rbind(puppies1,puppies2)
participants <- xydata %>%
select(name, group, gender, lang) %>%
distinct()
puppies <- left_join(participants, puppies, by = "name")
summary(lmer(data = puppies, sec ~ lang + (1|name) + (1|pups)))
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sec ~ lang + (1 | name) + (1 | pups)
Data: puppies
REML criterion at convergence: 15818.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.9738 -0.5348 -0.0747 0.3391 8.2862
Random effects:
Groups Name Variance Std.Dev.
name (Intercept) 0.8587 0.9267
pups (Intercept) 0.1115 0.3339
Residual 1.8390 1.3561
Number of obs: 4550, groups: name, 26; pups, 7
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 2.1716 0.2655 29.2732 8.178 4.76e-09 ***
langSE -0.5248 0.3759 23.9367 -1.396 0.176
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr)
langSE -0.545
ggplot(puppies, aes(x = lang, y = sec, fill = lang)) + geom_boxplot()

---
title: "Babies - Study 2 - Results"
output: 
  html_notebook:
    code_folding: hide
    theme: spacelab
    highlight: tango
    toc: yes
    toc_depth: 2
    toc_float: yes
    df_print: paged
---

This is hopefully the results section for the Study 2 NSE & SE babies watching ASL Stories. We have two main factors: 

1. Language (Sign v. English)
1. Direction (Forward v. Reversed)

Ages of babies are from 5 to 14.99 months. That means we lose two "old" CODA babies - Janine (16 mo) and za02 (23 mo). The dataset also contains factors for younger v. older babies *IF* we want to do age group splits. I do not recommend it because we lose all significance that way and we have more precise age information, better than brute groups.  IF we do use it, the baby age groups are such:

* Younger babies = 5 to 8.99 months
* Older babies = 9.0 to 14.99 months

To skip to the XY Data, go here. 
To skip to the AOI/FCR analysis, go here. 

# Demographics
```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)

# Grab data that was produced in 03importcleanbabies.Rmd
babies <- read_feather("cleanedbabyeyedata.feather") %>%
  mutate(age = age*12) %>%
  select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
  rename(name = participant) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  filter(!is.na(agegroup)) %>%
  mutate(language = case_when(
    language == "english" ~ "NSE",
    language =="sign" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  mutate(lang2 = case_when(
    name == "aubrey CODA 11m" ~ "HSE",
    name == "GemmaF_11_4_13_CODA" ~ "HSE",
    name == "Hannah_CODA_7m" ~ "HSE",
    name == "ke11es12_7m_MomTerp" ~ "LSE",
    name == "Miles_14m_SE" ~ "HSE",
    name == "Parker 6 m SIGNING" ~ "HSE",
    name == "Penn 6 months SIGN EXPOSED" ~ "HSE",
    name == "Trinity 8m" ~ "LSE",
    name == "Victoria07_18_17" ~ "LSE",
    name == "Wells_8mos" ~ "LSE",
    TRUE ~ lang
  ))
#  filter(name != "Victoria07_18_17")

babiesinfo <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang) %>%
  summarise(N = n(),
            age_mean = mean(age),
            sd = sd(age),
            min = min(age),
            max = max(age))

genders <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang, gender) %>%
  summarise(N = n()) %>%
  spread(gender, N)

babiesinfo <- left_join(babiesinfo, genders) %>%
  select(lang, N, Female, Male, age_mean, sd, min, max) %>%
  print()

babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))


# IF we do age groups, use this code
# 
# babiesinfo <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup) %>%
#   summarise(N = n(),
#             age_mean = mean(age),
#             sd = sd(age),
#             min = min(age),
#             max = max(age))
# 
# genders <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup, gender) %>%
#   summarise(N = n()) %>%
#   spread(gender, N)
# 
# babiesinfo <- left_join(babiesinfo, genders) %>%
#   select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
#   print()

```

# Global Looking

We calculated percentages *based on overall clip length* as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length. 

The chart below shows little differences based on factors Age, Language, or Direction. That's good, means the videos were equally engaging for all babies, right? 

```{r}
babies$lang <- as.factor(babies$lang)
babies_overall_looking <- babies %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(percent = sum(percent)) # gets total looking percent for each trial for each baby

# Table of means
babies_overall_looking %>% 
  group_by(name, lang, direction) %>%
  summarise(percent = mean(percent)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_percent = mean(percent),
            count = n(),
            sd = sd(percent),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

ggplot(babies_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Video Attention") +
  xlab("age (months)") +
  ylab("percent looking") + 
  theme_bw() + 
  scale_y_continuous(limits = c(0,1), labels = percent) 


# Plot
# babies_overall_looking %>% 
#   group_by(lang, direction, name) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(lang, direction) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
# #  facet_wrap("lang") +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")

# babies_overall_looking %>%
#   ggplot(aes(x = lang, y = percent, fill = direction)) +
#   facet_wrap("agegroup") + 
#   geom_violin()
```

A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how *much* the babies looked at the stimuli.  

```{r}
global_lm <- lmer(percent ~ age + lang * direction + (direction|name) + (direction|story), data = babies_overall_looking)
summary(global_lm) 
#ggcoef(global_lm)
```

# AOI Looking
Now we'll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let's make a boxplot of all AOIs. 

```{r}
#Recalculate percent
babies <- babies %>%
  select(-percent) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender) %>%
  mutate(totalsec = sum(secs)) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender, aoi) %>%
  mutate(percent = secs/totalsec)

# Boxplot
babies %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))
```
It appears two important AOIs are MidChestTop and MidFaceBottom. Let's look again only at midline AOIs:

```{r}
midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")
babies %>%
  filter(aoi %in% midline) %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("Midline AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))
```

I'm going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.

**MidChestTop:**

* Significant effect of age (p = 0.004) - older babies look at MidChestTop AOI more.
* Significant effect of language (p = 0.006) - NSE babies look +14.6% more than SE babies. 
* Significant effect of direction (p = 0.046) - Babies look about 4.5% less for reversed
* No language X direction interaction. 

**MidFaceBottom:** 

* No effect of age
* Significant effect of language (p = 0.004) - SE babies look +21% more than NSE babies.
* Marginal effect of language (p = 0.073) - Babies look about 2% more for reversed. 
* No language X direction interaction.

```{r}
babies %>%
  filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
  ggplot(aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(0,1), labels = percent) +
  theme_bw() + 
#  theme(panel.grid.major.x = element_blank()) +
  facet_grid(aoi ~ lang) +
  ggtitle("AOI Attention") +
  xlab("") +
  ylab("percent looking")

midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidChestTop"))
summary(midchesttop_lm)
#ggcoef(midchesttop_lm)

midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)
#ggcoef(midfacebottom_lm)

# Bar chart
# babies %>%
#   filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
#   group_by(agegroup, lang, direction, name, aoi) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction, aoi) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_grid(aoi ~ agegroup) +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")
```


# Face-Chest Ratio
Next, we'll define a Face-Chest Ratio (FCR) such that:

1. MidFaceCenter, MidFaceBottom = Face
1. MidChestTop, MidChestCenter, MidChestBottom, BelowChest = Chest
1. FCR = face - chest / face + chest

We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.

```{r}
babies_fcr <- babies %>%
  spread(aoi,percent) %>%
  group_by(name, age, agegroup, lang, gender, direction, story, repetition) %>%
  summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
         chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
         fcr = (face - chest) / (face + chest))

# Table of means
babies_fcr %>% 
  group_by(lang, direction, name) %>%
  summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_fcr = mean(fcr), # gets group averages
            count = n(),
            sd = sd(fcr),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(babies_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Face-Chest Ratios") +
  xlab("") +
  ylab("FCR")

# Bar chart
# babies_fcr %>% 
#   group_by(agegroup, lang, direction, name) %>%
#   summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction) %>%
#   summarise(mean_fcr = mean(fcr), # gets group averages
#             count = n(),
#             sd = sd(fcr),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_fcr, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(-1,1)) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_wrap("agegroup") +
#   ggtitle("Face-Chest Ratios") +
#   xlab("") +
#   ylab("FCR")
```

What will a linear mixed model tell us? (with no age interactions)

* No effect of age. Interesting. Maybe because we don't have that many babies. 
* Strong effect of language: SE babies have overall +0.50 FCR than NSE babies (p = 0.008); in other words, they are more attracted to the face. 
* Effect of direction: Reversal *increases* FCR by +0.12 across all babies (p = 0.032). Interesting.
* Weak language x direction interaction. It seems NSE babies have a bigger reversal effect than SE babies? Trying to judge from the heat maps. (p = 0.095). BUt it's so weak so possibly not worth mentioning.


```{r}
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = babies_fcr)
summary(fcr_lm)
#ggcoef(fcr_lm)
```

# Heat Maps
And now heat maps!

```{r}
heatmap_babies <- babies %>%
  filter(aoi %in% midline) %>%
  ungroup() %>%
  group_by(lang, name, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  group_by(lang, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  ungroup() %>%
  mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")))

ggplot(heatmap_babies, aes(x = lang, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ direction) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))

ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ lang) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))
```

# Different Ways to Visualize Reversal
I want to try to visualize reversal effects a different way. Maybe this. 

```{r}
# Get participant-level data
babies_fcr2 <- babies_fcr %>%
  group_by(name, age, lang, direction) %>%
  summarise(fcr = mean(fcr))

# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = babies_fcr2)
# summary(reversal_effect_lm)

ggplot(babies_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
  geom_point() +
  geom_line(aes(group = name)) +
  facet_grid(. ~ lang) + 
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw()

```

Or a reversal effect chart? Okay, so this chart tells us overall there really wasn't much of a reversal effect for SE babies, they're all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories! 

```{r}
# Get participant-level data
babies_fcr3 <- babies_fcr2 %>%
  spread(direction, fcr) %>%
  group_by(name, age, lang) %>%
  summarise(diff = forward - reversed)

ggplot(babies_fcr3, aes(x = age, y = diff, color = lang)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() +
  ggtitle("Reversal Effect") +
  ylab("Forward FCR - Reversed FCR")
```

This analysis of within-subject variation here: 

```{r}
# First get the mean of each trial, THEN the participant-level means
within_subjects <- babies_fcr %>%
  group_by(name, lang, direction, story, repetition) %>%
  summarise(fcr = mean(fcr, na.rm = TRUE),
            count = n()) %>%
  group_by(name, lang, direction) %>%
  summarise(mean = mean(fcr, na.rm = TRUE),
            se = sd(fcr, na.rm = TRUE)/sqrt(n()),
            count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
  select(-se, -count) %>%
  spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
  select(-mean, -count) %>%
  spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))

# Now let's plot
lims <- c(-1,1)
within_subjects %>%
  ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
  geom_abline() +
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
  geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
  theme_bw() +
  theme(aspect.ratio = 1) +
  scale_x_continuous("forward", limits = c(-1,1)) +
  scale_y_continuous("reversed", limits = c(-1,1)) +
  ggtitle("FCR Means") +
  facet_wrap("lang")
```

And a classic box/error plot with age collapsed. 

```{r}
babies_fcr2 %>%
  group_by(lang, direction) %>%
  summarise(fcr_mean = mean(fcr),
            sd = sd(fcr),
            n = n(),
            se = sd/sqrt(n)) %>%
  ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
  scale_y_continuous(limits = c(-0.5, 0.5)) +
  theme_linedraw()

```

# Discussion re AOI/anatomical data
The biggest change is we lost the interaction between language and direction. For the ICSLA abstract we reported a strong interaction (p = 0.01), but now it's p = 0.10. Shoot. 

The interpretation here is that:

* All babies looked equally at all videos regardless of language or direction. Good!
* SE babies are stronger face-lookers than NSE babies. (Same as ICSLA)
* Reversal appears to affect NSE babies more than SE babies? (Marginal). This is unlike ICSLA. 

```{r eval=FALSE, fig.height=12, fig.width=12, include=FALSE}
# Correlations

# Let's try correlations
babies_nse <- babies %>% 
  filter(aoi %in% midline) %>%
  filter(lang == "NSE") %>%
  group_by(name, direction, aoi) %>% 
  summarise(percent = mean(percent)) %>%
  ungroup() %>%
  mutate(direction = case_when(
    direction == "forward" ~ "fw",
    direction == "reversed" ~ "rv"
  )) %>% 
  unite(aoi2, direction, aoi, sep = "_") %>%
  spread(aoi2, percent) %>%
  select(-name)

babies_se <- babies %>% 
  filter(aoi %in% midline) %>%
  filter(lang == "SE") %>%
  group_by(name, direction, aoi) %>% 
  summarise(percent = mean(percent)) %>%
  ungroup() %>%
  mutate(direction = case_when(
    direction == "forward" ~ "fw",
    direction == "reversed" ~ "rv"
  )) %>% 
  unite(aoi2, direction, aoi, sep = "_") %>%
  spread(aoi2, percent) %>%
  select(-name)

ggcorr(babies_nse, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("NSE")

ggcorr(babies_se, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("SE")

library(corrr)
babies_nse %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("NSE Babies")
babies_se %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("SE Babies")
```

# XY Space Data
We'll load the data from the `childxydata.feather` file made in 06rawxydata.Rmd. So any new babies, please run the first code block in 06 to include it. Then we'll keep all the babies we also have in the AOI data group. 

```{r message=FALSE, warning=FALSE}
included <- babies %>%
  ungroup() %>%
  select(name) %>% 
  distinct() %>%
  unlist()

xydata <- read_feather("../Child Data/childxydata.feather") %>%
  rename(name = participant) %>%
  filter(name %in% included)

# Get ages
ages <- read_csv("childrenages.csv") %>%
  rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
  mutate(age = age*12) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  mutate(language = case_when(
    language == "EnglishExposed" ~ "NSE",
    language == "SignLanguageExposed" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
  separate(condition, into = c("story", "clip", "direction")) %>%
  unite("story", c("story", "clip")) %>%
  mutate(direction = case_when(
    direction == "ER" ~ "reversed",
    direction == "FW" ~ "forward"
  )) %>%
  mutate(name = factor(name),
         group = factor(group),
         gender = factor(gender),
         lang = factor(lang),
         story = factor(story),
         direction = factor(direction),
         mark = factor(mark),
         trial = factor(trial),
         repetition = factor(repetition),
         agegroup = factor(agegroup))
```

## Overall Looking
Let's check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as "Global Looking" we have above, really, but w raw xy data. 

```{r}
xy_overall <- xydata %>%
  filter(!is.na(x)) %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(data_points = n()) # gets total looking percent for each trial for each baby

# Table of means
xy_overall %>% 
  group_by(name, lang, direction) %>%
  summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_data_points = mean(data_points),
            count = n(),
            sd = sd(data_points),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Data Points") +
  xlab("age (months)") +
  ylab("data points recorded") + 
  theme_bw() 
```


A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how *much* data was collected from the babies.   

```{r}
overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm) 
#ggcoef(overall_xy_lm)
```

## XY Data LMMs
Now we're going to run LMMs on babies' raw: 

* horizontal spread (middle 50% of x data; xIQR)
* vertical spread (middle 50% of y data; yIQR)
* viewing area (A = middle-x * middle-y; area)

But to do this we first trim each baby's data, getting rid of the first 60 samples (0.5 secs) of each trial. 

```{r}
xydata <- xydata %>%
  group_by(name,trial) %>%
  slice(60:n())

iqr <- xydata %>%
  group_by(name, age, lang, story, direction, trial) %>%
  summarise(xIQR = IQR(x,na.rm=TRUE),
                   yIQR = IQR(y,na.rm=TRUE),
                   xmed = median(x, na.rm=TRUE),
                   ymed = median(y, na.rm=TRUE),
                   area = xIQR*yIQR)
head(iqr,20)

```


### Middle X

Results tell us that there is a significant effect of age where horizontal spread grew narrower with older babies (p = 0.024; slope = -2.2 px/month). There was a marginal effect of language where SE babies have slightly narrower horizontal spread (11 px; p = 0.088). No effects of reversal or interactions.

```{r}
xiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_xIQR = mean(xIQR), # gets group averages
            count = n(),
            sd = sd(xIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Horizontal Spread") +
  xlab("") +
  ylab("xIQR")

ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

xiqr_lm <- lmer(xIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(xiqr_lm)
#ggcoef(xiqr_lm)
```


### Middle Y

We had a significant effect of age where vertical spread got narrower with older babies (p = 0.019, slope = -5.5 px/month). There were no effect of language, direction, or interactions. 

```{r}
yiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_yIQR = mean(yIQR), # gets group averages
            count = n(),
            sd = sd(yIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Vertical Spread") +
  xlab("") +
  ylab("yIQR")

ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

yiqr_lm <- lmer(yIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(yiqr_lm)
#ggcoef(yiqr_lm)
```

### Viewing Area
Again, age is significant (p = 0.01) such that area gets smaller with age (about 500 sq. px. per month). No effect of language or direction or interactions. 

```{r}
area_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(area_mean = mean(area), # gets group averages
            count = n(),
            sd = sd(area),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Viewing Area") +
  xlab("") +
  ylab("Area (px^2)")

ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

area_lm <- lmer(area ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(area_lm)
#ggcoef(area_lm)
```

## Plotting Viewing Area 

```{r}
medians <- iqr %>%
  group_by(name,lang,direction) %>%
  summarise(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   xmed = median(xmed,na.rm=TRUE),
                   ymed = median(ymed,na.rm=TRUE)) %>%
  group_by(lang,direction) %>% 
  summarise(xIQR = mean(xIQR,na.rm=TRUE),
                   yIQR = mean(yIQR,na.rm=TRUE),
                   x = median(xmed,na.rm=TRUE),
                   y = median(ymed,na.rm=TRUE)) %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 
ggplot(medians, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
  facet_wrap("lang")
```

# Puppies
Get puppy data!

```{r message=FALSE, warning=FALSE}
# puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:10], na.rm = T)) %>%
#   select(name, mean)
#   
# puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:17], na.rm = T)) %>%
#   select(name, mean)
# 
# puppies <- rbind(puppies1, puppies2)
# 
# participants <- xydata %>% 
#   select(name, group, gender, lang) %>% 
#   distinct()
# 
# puppies <- left_join(participants, puppies, by = "name")
# 
# summary(lm(data = puppies, mean ~ lang))
# 
# puppies_se <- filter(puppies, lang == "NSE")
# puppies_nse <- filter(puppies, lang == "SE")
# 
# t.test(puppies_se$mean, puppies_nse$mean)

# Let's do this again but preserve puppy-level data
puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppy_jpg") ~ "puppy",
    TRUE ~ puppy
  ))

puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppies") ~ "puppies",
    TRUE ~ puppy
  ))

puppies <- rbind(puppies1,puppies2)

participants <- xydata %>%
  select(name, group, gender, lang) %>%
  distinct()

puppies <- left_join(participants, puppies, by = "name")

summary(lmer(data = puppies, sec ~ lang + (1|name) + (1|pups)))

ggplot(puppies, aes(x = lang, y = sec, fill = lang)) + geom_boxplot()
```

